%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from sgmtradingcore.analytics.metrics import flat_capital_metrics
from stratagemdataprocessing.dbutils.mongo import MongoPersister
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 8)
db_name = 'backtesting'
collection_name = 'strategy_results'
mongo_db = MongoPersister.init_from_config(db_name, auto_connect=True)
coll = mongo_db[collection_name]
def to_dataframe(orders):
settled = filter(lambda o: o['status_str'] == 'SETTLED', orders)
cols = ['placed_time', 'pnl', 'size', 'bet_side', 'price', 'date_day', 'event_id', 'sticker', 'details']
rcols = {'placed_time': 'dt', 'size': 'stake', 'price': 'odds', 'date_day': 'date'}
df = pd.DataFrame(settled, columns=cols).rename(columns=rcols)
df['is_back'] = (df['bet_side'] == 'back')
df['capital'] = 10000
df['reason'] = df['details'].apply(lambda d: d.get('reason'))
df['pair_key'] = df['details'].apply(lambda d: tuple(d['pair_key']))
df['portfolio_id'] = df['details'].apply(lambda d: d['portfolio_id'])
df['plausible_returns'] = df['details'].apply(lambda d: d.get('plausible_returns', []))
df['weight'] = df['details'].apply(lambda d: None if d['trade_intention']['name'] != 'OpenTradeSingleSticker' else d['signals'][0]['value'][d['trade_intention']['sticker']])
del df['details']
return df
name = 'coint'
desc = 'cochrane_orcutt'
code = 'ft12_ftps.nba'
mnemonic = 'ts.mmp_entryexit'
trading_id = '562f5bef497aee1c22000001'
config_id = '5a00cd4d9316de5683386d49'
rows = list(coll.find({
'strategy_name': name,
'strategy_desc': desc,
'strategy_code': code,
# 'trading_user_id': trading_id,
'mnemonic': mnemonic,
'config_id': config_id
}))
df = pd.concat([
to_dataframe(list(mongo_db['orders'].find({
'strategy_result_id': str(r['_id'])
}))) for r in rows if r['n_orders'] > 0
])
df['month'] = df.dt.dt.month
metrics = flat_capital_metrics(df, groupby='month').T
metrics.to_csv('/tmp/metrics.csv')
metrics
flat_capital_metrics(df, groupby='date')['total_pnl'].hist()
plt.title('Distribution of total daily PnL.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
flat_capital_metrics(df, groupby='event_id')['total_pnl'].hist()
plt.title('Distribution of event PnL.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
pdf = pd.DataFrame(columns=['portfolio_id', 'pnl', 'age', 'reason']).set_index('portfolio_id')
gps = df.groupby('portfolio_id')
for g in gps.groups:
gp = gps.get_group(g).sort_values('dt')
pdf.loc[g] = [gp.pnl.sum(), (gp.iloc[-1]['dt'] - gp.iloc[0]['dt']).total_seconds(), gp.iloc[-1].reason]
pdf['age'] = pdf['age'].astype(np.int)
pdf.describe().T
print 'Minimum PnL:'
pdf[pdf.pnl == pdf.pnl.min()][['pnl', 'age']]
print 'Maxmimum PnL:'
pdf[pdf.pnl == pdf.pnl.max()][['pnl', 'age']]
axes = pdf[['pnl', 'age']].hist()[0]
plt.suptitle('Distribution of portfolio age and PnL')
axes[0].set_xlabel('Age [s]')
axes[0].set_ylabel('Frequency')
axes[1].set_xlabel('PnL')
axes[1].set_ylabel('Frequency')
pdf[pdf.age < pdf.age.quantile(0.25)]['pnl'].hist(alpha=0.75)
pdf[pdf.age > pdf.age.quantile(0.75)]['pnl'].hist(alpha=0.75)
plt.legend(['< q25', '> q75'])
plt.title('Distribution of portfolio PnLs for the upper and lower quantiles on age.')
plt.xlabel('PnL')
plt.ylabel('Frequency')
df.groupby('reason')['pnl'].describe().T
f, ax = plt.subplots(1)
gps = pdf[pdf.reason.values != None].groupby('reason')
for gid, gp in gps:
gp.pnl.hist(alpha=0.6, label=gid)
_ = plt.legend()
sns.lmplot("age", "pnl", data=pdf, hue="reason", fit_reg=False, aspect=1.8)
plt.title('Distribution of portfolio PnLs as a function of age.')
plt.xlabel('Age [s]')
plt.ylabel('PnL')
def load_df(s):
from sgmarb.backtesting.data import clean_dataframe
from stratagemdataprocessing.bookmakers.common.odds.cache import HistoricalOddsCache
hoc = HistoricalOddsCache(parse=False)
sdf = clean_dataframe(hoc.get('%s.BF' % s), min_matched=0)
if sdf is not None:
sdf['timestamp'] = pd.to_datetime(sdf.timestamp, unit='ms')
return sdf.set_index('timestamp')
else:
return None
def do_plot(pk):
edf = df[df.pair_key == pk].sort_values('dt')
stickers = edf['sticker'].unique()
sdfs = {s: load_df(s) for s in stickers}
sdfs = {s: df for (s, df) in sdfs.iteritems() if df is not None}
f, axes = plt.subplots(len(sdfs), sharex=True, figsize=(16, 4*len(sdfs)))
plt.suptitle(pk)
for i, (s, sdf) in enumerate(sdfs.iteritems()):
mp = (sdf['bp1'] + sdf['lp1']) / 2.0
axes[i].plot(sdf.index, mp, drawstyle='steps-post', color='k', alpha=0.5)
axes[i].set_title(s)
axes[i].set_ylim([1.0, min(10.0, mp.max()*1.1)])
for pid, p in edf[edf.sticker == s].groupby('portfolio_id'):
open_dt = p['dt'].min()
close_dt = p['dt'].max()
if p[p.dt == open_dt].iloc[0].is_back:
axes[i].axvspan(open_dt, close_dt, color='red', alpha=0.1, label='Back')
else:
axes[i].axvspan(open_dt, close_dt, color='blue', alpha=0.1, label='Lay')
for pk in df.pair_key.unique()[:50]:
do_plot(pk)